from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfinterp import process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from io import StringIO
from docx import Document


file = open("./mypdf.pdf", 'rb')

resource_manager = PDFResourceManager()
return_str = StringIO()
lap_params = LAParams()

device = TextConverter(resource_manager, return_str, laparams=lap_params)
process_pdf(resource_manager, device, file)
device.close()

content = return_str.getvalue()


def remove_control_characters(content):
    mpa = dict.fromkeys(range(32))
    return content.translate(mpa)


doc = Document()
for line in content.split('\n'):
    paragraph = doc.add_paragraph()
    paragraph.add_run(remove_control_characters(line))
doc.save("mypdf.docx")

